<?xml version="1.0" encoding="ascii"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
          "DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
  <title>tagger.build_dict</title>
  <link rel="stylesheet" href="epydoc.css" type="text/css" />
  <script type="text/javascript" src="epydoc.js"></script>
</head>

<body bgcolor="white" text="black" link="blue" vlink="#204080"
      alink="#204080">
<!-- ==================== NAVIGATION BAR ==================== -->
<table class="navbar" border="0" width="100%" cellpadding="0"
       bgcolor="#a0c0ff" cellspacing="0">
  <tr valign="middle">

  <!-- Tree link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Index link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Help link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Project homepage -->
      <th class="navbar" align="right" width="100%">
        <table border="0" cellpadding="0" cellspacing="0">
          <tr><th class="navbar" align="center"
            ><a class="navbar" target="_top" href="http://github.com/apresta/tagger">tagger</a></th>
          </tr></table></th>
  </tr>
</table>
<table width="100%" cellpadding="0" cellspacing="0">
  <tr valign="top">
    <td width="100%">
      <span class="breadcrumbs">
        Package&nbsp;tagger ::
        Module&nbsp;build_dict
      </span>
    </td>
    <td>
      <table cellpadding="0" cellspacing="0">
        <!-- hide/show private -->
        <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
    onclick="toggle_private();">hide&nbsp;private</a>]</span></td></tr>
        <tr><td align="right"><span class="options"
            >[<a href="frames.html" target="_top">frames</a
            >]&nbsp;|&nbsp;<a href="tagger.build_dict-module.html"
            target="_top">no&nbsp;frames</a>]</span></td></tr>
      </table>
    </td>
  </tr>
</table>
<!-- ==================== MODULE DESCRIPTION ==================== -->
<h1 class="epydoc">Module build_dict</h1><p class="nomargin-top"><span class="codelink"><a href="tagger.build_dict-pysrc.html">source&nbsp;code</a></span></p>
<p>Usage: build_dict.py -o &lt;output file&gt; -s &lt;stopwords file&gt; 
  &lt;list of files&gt;</p>

<!-- ==================== FUNCTIONS ==================== -->
<a name="section-Functions"></a>
<table class="summary" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr bgcolor="#70b0f0" class="table-header">
  <td colspan="2" class="table-header">
    <table border="0" cellpadding="0" cellspacing="0" width="100%">
      <tr valign="top">
        <td align="left"><span class="table-header">Functions</span></td>
        <td align="right" valign="top"
         ><span class="options">[<a href="#section-Functions"
         class="privatelink" onclick="toggle_private();"
         >hide private</a>]</span></td>
      </tr>
    </table>
  </td>
</tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a href="tagger.build_dict-module.html#build_dict" class="summary-sig-name">build_dict</a>(<span class="summary-sig-arg">corpus</span>,
        <span class="summary-sig-arg">stopwords</span>=<span class="summary-sig-default">None</span>,
        <span class="summary-sig-arg">measure</span>=<span class="summary-sig-default"><code class="variable-quote">'</code><code class="variable-string">IDF</code><code class="variable-quote">'</code></span>)</span><br />
      Returns:
      a dictionary of weights in the interval [0,1]</td>
          <td align="right" valign="top">
            <span class="codelink"><a href="tagger.build_dict-pysrc.html#build_dict">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a href="tagger.build_dict-module.html#build_dict_from_files" class="summary-sig-name">build_dict_from_files</a>(<span class="summary-sig-arg">output_file</span>,
        <span class="summary-sig-arg">corpus_files</span>,
        <span class="summary-sig-arg">stopwords_file</span>=<span class="summary-sig-default">None</span>,
        <span class="summary-sig-arg">reader</span>=<span class="summary-sig-default">SimpleReader()</span>,
        <span class="summary-sig-arg">stemmer</span>=<span class="summary-sig-default">Stemmer()</span>,
        <span class="summary-sig-arg">measure</span>=<span class="summary-sig-default"><code class="variable-quote">'</code><code class="variable-string">IDF</code><code class="variable-quote">'</code></span>,
        <span class="summary-sig-arg">verbose</span>=<span class="summary-sig-default">False</span>)</span></td>
          <td align="right" valign="top">
            <span class="codelink"><a href="tagger.build_dict-pysrc.html#build_dict_from_files">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
</table>
<!-- ==================== VARIABLES ==================== -->
<a name="section-Variables"></a>
<table class="summary" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr bgcolor="#70b0f0" class="table-header">
  <td colspan="2" class="table-header">
    <table border="0" cellpadding="0" cellspacing="0" width="100%">
      <tr valign="top">
        <td align="left"><span class="table-header">Variables</span></td>
        <td align="right" valign="top"
         ><span class="options">[<a href="#section-Variables"
         class="privatelink" onclick="toggle_private();"
         >hide private</a>]</span></td>
      </tr>
    </table>
  </td>
</tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
        <a name="__package__"></a><span class="summary-name">__package__</span> = <code title="'tagger'"><code class="variable-quote">'</code><code class="variable-string">tagger</code><code class="variable-quote">'</code></code>
    </td>
  </tr>
</table>
<!-- ==================== FUNCTION DETAILS ==================== -->
<a name="section-FunctionDetails"></a>
<table class="details" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr bgcolor="#70b0f0" class="table-header">
  <td colspan="2" class="table-header">
    <table border="0" cellpadding="0" cellspacing="0" width="100%">
      <tr valign="top">
        <td align="left"><span class="table-header">Function Details</span></td>
        <td align="right" valign="top"
         ><span class="options">[<a href="#section-FunctionDetails"
         class="privatelink" onclick="toggle_private();"
         >hide private</a>]</span></td>
      </tr>
    </table>
  </td>
</tr>
</table>
<a name="build_dict"></a>
<div>
<table class="details" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr><td>
  <table width="100%" cellpadding="0" cellspacing="0" border="0">
  <tr valign="top"><td>
  <h3 class="epydoc"><span class="sig"><span class="sig-name">build_dict</span>(<span class="sig-arg">corpus</span>,
        <span class="sig-arg">stopwords</span>=<span class="sig-default">None</span>,
        <span class="sig-arg">measure</span>=<span class="sig-default"><code class="variable-quote">'</code><code class="variable-string">IDF</code><code class="variable-quote">'</code></span>)</span>
  </h3>
  </td><td align="right" valign="top"
    ><span class="codelink"><a href="tagger.build_dict-pysrc.html#build_dict">source&nbsp;code</a></span>&nbsp;
    </td>
  </tr></table>
  
  
  <dl class="fields">
    <dt>Parameters:</dt>
    <dd><ul class="nomargin-top">
        <li><strong class="pname"><code>corpus</code></strong> - a list of documents, represented as lists of (stemmed) words</li>
        <li><strong class="pname"><code>stopwords</code></strong> - the list of (stemmed) words that should have zero weight</li>
        <li><strong class="pname"><code>measure</code></strong> - the measure used to compute the weights ('IDF' i.e. 'inverse 
          document frequency' or 'ICF' i.e. 'inverse collection frequency';
          defaults to 'IDF')</li>
    </ul></dd>
    <dt>Returns:</dt>
        <dd>a dictionary of weights in the interval [0,1]</dd>
  </dl>
</td></tr></table>
</div>
<a name="build_dict_from_files"></a>
<div>
<table class="details" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr><td>
  <table width="100%" cellpadding="0" cellspacing="0" border="0">
  <tr valign="top"><td>
  <h3 class="epydoc"><span class="sig"><span class="sig-name">build_dict_from_files</span>(<span class="sig-arg">output_file</span>,
        <span class="sig-arg">corpus_files</span>,
        <span class="sig-arg">stopwords_file</span>=<span class="sig-default">None</span>,
        <span class="sig-arg">reader</span>=<span class="sig-default">SimpleReader()</span>,
        <span class="sig-arg">stemmer</span>=<span class="sig-default">Stemmer()</span>,
        <span class="sig-arg">measure</span>=<span class="sig-default"><code class="variable-quote">'</code><code class="variable-string">IDF</code><code class="variable-quote">'</code></span>,
        <span class="sig-arg">verbose</span>=<span class="sig-default">False</span>)</span>
  </h3>
  </td><td align="right" valign="top"
    ><span class="codelink"><a href="tagger.build_dict-pysrc.html#build_dict_from_files">source&nbsp;code</a></span>&nbsp;
    </td>
  </tr></table>
  
  
  <dl class="fields">
    <dt>Parameters:</dt>
    <dd><ul class="nomargin-top">
        <li><strong class="pname"><code>output_file</code></strong> - the name of the file where the dictionary should be saved</li>
        <li><strong class="pname"><code>corpus_files</code></strong> - a list of files with words to process</li>
        <li><strong class="pname"><code>stopwords_file</code></strong> - a file containing a list of stopwords</li>
        <li><strong class="pname"><code>reader</code></strong> - the <a href="tagger.tagger.Reader-class.html" 
          class="link">Reader</a> object to be used</li>
        <li><strong class="pname"><code>stemmer</code></strong> - the <a href="tagger.tagger.Stemmer-class.html" 
          class="link">Stemmer</a> object to be used</li>
        <li><strong class="pname"><code>measure</code></strong> - the measure used to compute the weights ('IDF' i.e. 'inverse 
          document frequency' or 'ICF' i.e. 'inverse collection frequency';
          defaults to 'IDF')</li>
        <li><strong class="pname"><code>verbose</code></strong> - whether information on the progress should be printed on screen</li>
    </ul></dd>
  </dl>
</td></tr></table>
</div>
<br />
<!-- ==================== NAVIGATION BAR ==================== -->
<table class="navbar" border="0" width="100%" cellpadding="0"
       bgcolor="#a0c0ff" cellspacing="0">
  <tr valign="middle">

  <!-- Tree link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Index link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Help link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Project homepage -->
      <th class="navbar" align="right" width="100%">
        <table border="0" cellpadding="0" cellspacing="0">
          <tr><th class="navbar" align="center"
            ><a class="navbar" target="_top" href="http://github.com/apresta/tagger">tagger</a></th>
          </tr></table></th>
  </tr>
</table>
<table border="0" cellpadding="0" cellspacing="0" width="100%%">
  <tr>
    <td align="left" class="footer">
    Generated by Epydoc 3.0.1 on Wed Jun  8 01:57:42 2011
    </td>
    <td align="right" class="footer">
      <a target="mainFrame" href="http://epydoc.sourceforge.net"
        >http://epydoc.sourceforge.net</a>
    </td>
  </tr>
</table>

<script type="text/javascript">
  <!--
  // Private objects are initially displayed (because if
  // javascript is turned off then we want them to be
  // visible); but by default, we want to hide them.  So hide
  // them unless we have a cookie that says to show them.
  checkCookie();
  // -->
</script>
</body>
</html>
